{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# High-level Theano + Lasagne Example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting /home/iliauk/.theanorc\n"
     ]
    }
   ],
   "source": [
    "%%writefile ~/.theanorc\n",
    "[global]\n",
    "device = cuda0\n",
    "force_device= True\n",
    "floatX = float32\n",
    "warn_float64 = warn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using cuDNN version 6021 on context None\n",
      "Mapped name None to device cuda0: Tesla P100-PCIE-16GB (BC4B:00:00.0)\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import os\n",
    "import sys\n",
    "import theano.tensor as T\n",
    "import theano\n",
    "import lasagne\n",
    "import lasagne.layers as L\n",
    "import lasagne.nonlinearities as nl\n",
    "import lasagne.objectives as obj\n",
    "import lasagne.updates as upd\n",
    "from common.params import *\n",
    "from common.utils import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Performance Improvement\n",
    "# 1. CuDNN auto-tune\n",
    "theano.config.dnn.conv.algo_fwd = \"time_once\"\n",
    "theano.config.dnn.conv.algo_bwd_filter = \"time_once\"\n",
    "theano.config.dnn.conv.algo_bwd_data = \"time_once\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OS:  linux\n",
      "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
      "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
      "Numpy:  1.14.1\n",
      "Theano:  1.0.1\n",
      "Lasagne:  0.2.dev1\n",
      "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
      "CUDA Version 8.0.61\n",
      "CuDNN Version  6.0.21\n"
     ]
    }
   ],
   "source": [
    "print(\"OS: \", sys.platform)\n",
    "print(\"Python: \", sys.version)\n",
    "print(\"Numpy: \", np.__version__)\n",
    "print(\"Theano: \", theano.__version__)\n",
    "print(\"Lasagne: \", lasagne.__version__)\n",
    "print(\"GPU: \", get_gpu_name())\n",
    "print(get_cuda_version())\n",
    "print(\"CuDNN Version \", get_cudnn_version())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_symbol(n_classes=N_CLASSES):\n",
    "    conv1 = L.Conv2DLayer(X, num_filters=50, filter_size=(3, 3), pad='same')\n",
    "    conv2 = L.Conv2DLayer(conv1, num_filters=50, filter_size=(3, 3), pad='same')\n",
    "    pool1 = L.MaxPool2DLayer(conv2, pool_size=(2, 2), stride=(2, 2))\n",
    "    drop1 = L.DropoutLayer(pool1, 0.25)\n",
    "    \n",
    "    conv3 = L.Conv2DLayer(drop1, num_filters=100, filter_size=(3, 3), pad='same')\n",
    "    conv4 = L.Conv2DLayer(conv3, num_filters=100, filter_size=(3, 3), pad='same')\n",
    "    pool2 = L.MaxPool2DLayer(conv4, pool_size=(2, 2), stride=(2, 2))\n",
    "    drop2 = L.DropoutLayer(pool2, 0.25)\n",
    "    \n",
    "    flatten = L.FlattenLayer(drop2)\n",
    "    fc1 = L.DenseLayer(flatten, 512)\n",
    "    drop4 = L.DropoutLayer(fc1, 0.5)\n",
    "    pred = L.DenseLayer(drop4, n_classes, name=\"output\", nonlinearity=nl.softmax)\n",
    "    \n",
    "    return pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def init_model(net, y, lr=LR, momentum=MOMENTUM):\n",
    "    pred = L.get_output(net)\n",
    "    params = L.get_all_params(net)\n",
    "    xentropy = obj.categorical_crossentropy(pred, y)\n",
    "    loss = T.mean(xentropy)\n",
    "    # The tensorflow LR, MOMENTUM are slightly different\n",
    "    updates = upd.momentum(loss, params, lr, momentum)\n",
    "    return pred, loss, updates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Preparing train set...\n",
      "Preparing test set...\n",
      "(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)\n",
      "float32 float32 int32 int32\n",
      "CPU times: user 653 ms, sys: 595 ms, total: 1.25 s\n",
      "Wall time: 1.26 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# Data into format for library\n",
    "x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)\n",
    "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
    "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 50.6 ms, sys: 3.33 ms, total: 54 ms\n",
      "Wall time: 53.2 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# Place-holders\n",
    "X = L.InputLayer(shape=(None, 3, 32, 32))\n",
    "y = T.ivector(\"y\")\n",
    "# Initialise model\n",
    "net = create_symbol()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 259 ms, sys: 41.5 ms, total: 300 ms\n",
      "Wall time: 766 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "pred, loss, updates = init_model(net, y)\n",
    "# Accuracy for logging\n",
    "accuracy = obj.categorical_accuracy(pred, y)\n",
    "accuracy = T.mean(T.cast(accuracy, theano.config.floatX))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 2.93 s, sys: 424 ms, total: 3.36 s\n",
      "Wall time: 13.5 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# Compile functions\n",
    "train_func = theano.function([X.input_var, y], [loss, accuracy], updates=updates)\n",
    "pred = L.get_output(net, deterministic=True)\n",
    "pred_func = theano.function([X.input_var], T.argmax(pred, axis=1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 Train accuracy: 0.375\n",
      "1 Train accuracy: 0.640625\n",
      "2 Train accuracy: 0.578125\n",
      "3 Train accuracy: 0.6875\n",
      "4 Train accuracy: 0.6875\n",
      "5 Train accuracy: 0.78125\n",
      "6 Train accuracy: 0.703125\n",
      "7 Train accuracy: 0.734375\n",
      "8 Train accuracy: 0.671875\n",
      "9 Train accuracy: 0.765625\n",
      "CPU times: user 40 s, sys: 24.3 s, total: 1min 4s\n",
      "Wall time: 1min 5s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# Main training loop: 1m5s\n",
    "for j in range(EPOCHS):\n",
    "    for data, label in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):\n",
    "        loss, acc_train = train_func(data, label)\n",
    "    # Log\n",
    "    print(j, \"Train accuracy:\", acc_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 200 ms, sys: 120 ms, total: 319 ms\n",
      "Wall time: 319 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# Main evaluation loop: 319s\n",
    "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n",
    "y_guess = np.zeros(n_samples, dtype=np.int)\n",
    "y_truth = y_test[:n_samples]\n",
    "c = 0\n",
    "for data, label in yield_mb(x_test, y_test, BATCHSIZE):\n",
    "    output = pred_func(data)\n",
    "    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = output\n",
    "    c += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy:  0.7606169871794872\n"
     ]
    }
   ],
   "source": [
    "print(\"Accuracy: \", 1.*sum(y_guess == y_truth)/len(y_guess))"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
